% link PWT and Maddison Data .. all countries in PWT

clear all;
small = 1.0e-10;

% -- File Directories   
figdir = 'fig/';
outdir = 'out/';
matdir = 'mat/';


% Read Data .. PWT
xlsname = '../data/pwt91_20191101.xlsx';
[t1,country_code,t2] = xlsread(xlsname,'population','B1:GA1');
[t1,country,t2] = xlsread(xlsname,'population','B2:GA2');
[year,t1,t2] = xlsread(xlsname,'population','A3:A70');
[pop,t1,t2] = xlsread(xlsname,'population','B3:GA70');
[rgdpna,t1,t2] = xlsread(xlsname,'rgdpna','B3:GA70');
country_pwt = country;
country_code_pwt = country_code;
y_pwt = rgdpna;
p_pwt = pop;
yp_pwt = y_pwt./p_pwt;
year_pwt = year;
benchmark_list = country_code_pwt';

% Read Data 
xlsname = '../data/mpd2018.xlsx';
[t1,country_code,t2] = xlsread(xlsname,'rgdpnapc','B2:FN2');
[year,t1,t2] = xlsread(xlsname,'rgdpnapc','A3:A743');
[rgdpna_pc,t1,t2] = xlsread(xlsname,'rgdpnapc','B3:FN743');
[pop,t1,t2] = xlsread(xlsname,'pop','B3:FN743');

country_code_mad = country_code;
p_mad = pop/1000;   % Convert to millions
yp_mad = rgdpna_pc;
year_mad = year;
% Pad with 1 additional year so this is 2017 .. same as PWT
year_mad = [year_mad;2017];
p_mad = [p_mad;NaN(1,size(p_mad,2))];
yp_mad = [yp_mad;NaN(1,size(yp_mad,2))];

% Date for Maddison
ismpl = smpl(year_mad,[1900 1],[2017 1],1);   % Start in 1900
p_mad = p_mad(ismpl==1,:);
year_mad = year_mad(ismpl==1);
yp_mad = yp_mad(ismpl==1,:);
year_linked_all = year_mad;
n_T = size(year_linked_all,1);

% Check end dates and pad PWT with missing values
if year_mad(end) ~= year_pwt(end); error('misaligned dates'); end;
np = (year_pwt(1)-year_mad(1));
nc = size(benchmark_list,1);
year_pwt = [NaN(np,1);year_pwt];
yp_pwt = [NaN(np,size(yp_pwt,2));yp_pwt];
p_pwt = [NaN(np,size(p_pwt,2));p_pwt];

% Link data from pwt91
yp_linked_all = NaN(n_T,nc);
pop_linked_all = NaN(n_T,nc);
tvec = (1:1:n_T)';

% Find date in y
[tmp,i_T] = min(abs(year_linked_all-year_pwt(1)));
for ic = 1:nc;
    ss = char(benchmark_list(ic));
    jj_pwt = colnumber(ss,country_code_pwt);
    jj_mad = colnumber(ss,country_code_mad);
    if jj_mad == 0
        yp_linked_all(i_T:end,ic) = yp_pwt(:,jj_pwt);
        pop_linked_all(i_T:end,ic) = p_pwt(:,jj_pwt);
    else   
        tmp = packr([tvec yp_pwt(:,jj_pwt)]);
        rat = tmp(1,2)/yp_mad(tmp(1,1),jj_mad);
        yp_linked_all(1:tmp(1,1),ic) = yp_mad(1:tmp(1,1),jj_mad)*rat;
        yp_linked_all(tmp(1,1):end,ic) = tmp(:,2);   
        tmp = packr([tvec p_pwt(:,jj_pwt)]);
        rat = tmp(1,2)/p_mad(tmp(1,1),jj_mad);
        pop_linked_all(1:tmp(1,1),ic) = p_mad(1:tmp(1,1),jj_mad)*rat;
        pop_linked_all(tmp(1,1):end,ic) = tmp(:,2);
    end
end;

% Save Data for future use
country_code_linked_all = benchmark_list;
country_linked_all = country_pwt;
fstr = [matdir 'year_linked_all_pwt91']; save(fstr,'year_linked_all');
fstr = [matdir 'country_linked_all_pwt91']; save(fstr,'country_linked_all');
fstr = [matdir 'country_code_linked_all_pwt91']; save(fstr,'country_code_linked_all');
fstr = [matdir 'yp_linked_all_pwt91']; save(fstr,'yp_linked_all');
fstr = [matdir 'pop_linked_all_pwt91']; save(fstr,'pop_linked_all');

% Compute number of years of data at end of sample;
yp_linked_nyears_all = zeros(nc,1);
for ic = 1:nc;
    tmp = flipud(yp_linked_all(:,ic));
    it = 0;
    jj = 0;
    while jj == 0;
        it = it+1;
        if isnan(tmp(it)) == 1
            yp_linked_nyears_all(ic) = it-1;
            jj = 1;
        end
        if it == n_T
            yp_linked_nyears_all(ic) = n_T;
            jj = 1;
        end
    end;
%     ss = char(country_linked_all(ic));
%     fprintf([ss '\n']);
%     fprintf('%4i \n\n',yp_linked_nyears_all(ic));
end;

% Read in Linked Data from old dataset
% Compute Some numbers
iyears = 50;
ipop = 3;
ii1 = (yp_linked_nyears_all >= iyears);
ii2 = (pop_linked_all(end,:)' >= ipop);
ii = ii1.*ii2;

% List series not included

outfile_name = [outdir 'TableA2_Excluded_Countries.csv'];
fileID = fopen(outfile_name,'w');
fprintf(fileID,'Country,Country,GDP(2017),Pop(2017),nyears \n');
for i = 1:nc;
    if (ii(i) == 0);
      ss = char(benchmark_list(i));
      fprintf(fileID,[ss ',']);
      fprintf(fileID,'%8.1f ,',yp_linked_all(end,i));
      fprintf(fileID,'%8.1f ,',pop_linked_all(end,i));
      fprintf(fileID,'%4i ,',yp_linked_nyears_all(i));
      fprintf(fileID,'%2i,%2i, %2i \n',[ii1(i) ii2(i) ii(i)]);
    end;
end;

% Compute Fraction of World Pop from included countries;
yp_incl = yp_linked_all(end,ii==1)';
pop_incl = pop_linked_all(end,ii==1)';
yp_excl = yp_linked_all(end,ii==0)';
pop_excl = pop_linked_all(end,ii==0)';
y_incl = yp_incl.*pop_incl;
y_excl = yp_excl.*pop_excl;
y_incl_frac = sum(y_incl)/(sum(y_incl)+sum(y_excl));
p_incl_frac = sum(pop_incl)/(sum(pop_incl)+sum(pop_excl));

% Plot data for countries that we will use;
%{
for ic = 1:nc;
  if (ii(ic) == 1);
    ss = char(benchmark_list(ic));
    jj_pwt = colnumber(ss,country_code_pwt);
    jj_mad = colnumber(ss,country_code_mad);
    ss1 = char(country_code_pwt(jj_pwt));
    ypwt = yp_pwt(:,jj_pwt);
    d_ypwt = 100*dif(log(ypwt),1);
    ppwt = p_pwt(:,jj_pwt);
    d_ppwt = 100*dif(log(ppwt),1);
    if jj_mad == 0;
        ymad = NaN(n_T,1);
        pmad = NaN(n_T,1);
    else;
        ymad = yp_mad(:,jj_mad);
        pmad = p_mad(:,jj_mad);
    end;
    d_ymad = 100*dif(log(ymad),1);
    d_pmad = 100*dif(log(pmad),1);
    
    fig=figure;
    subplot(3,2,1);
      plot(year_linked_all,ypwt.*ppwt,'- r');
      hold on;
       plot(year_linked_all,ymad.*pmad,'- b');
      hold off
      str = char(country_pwt(jj_pwt));
      str = [str '  gdp (log)'];
      title(str);
      set(gca, 'YScale', 'log');
      xlim([1900 2020]);
      
    subplot(3,2,2);
      plot(year_linked_all,d_ypwt-d_ppwt,'- r');
      hold on;
       plot(year_linked_all,d_ymad-d_pmad,'- b');
      hold off
      str = ['  gdp (growth rate)'];
      title(str);
      xlim([1900 2020]); 
    
    subplot(3,2,3);
      plot(year_linked_all,ypwt,'- r');
      hold on;
       plot(year_linked_all,ymad,'- b');
      hold off
      str = ['  gdp per cap (log)'];
      title(str);
      set(gca, 'YScale', 'log');
      xlim([1900 2020]);
      
    subplot(3,2,4);
      plot(year_linked_all,d_ypwt,'- r');
      hold on;
       plot(year_linked_all,d_ymad,'- b');
      hold off
      str = ['  gdp per cap (growth rate)'];
      title(str);
      xlim([1900 2020]);
      
    subplot(3,2,5);
      plot(year_linked_all,ppwt,'- r');
      hold on;
       plot(year_linked_all,pmad,'- b');
      hold off
      str = char(country_pwt(jj_pwt));
      str = [str '  population (log)'];
      title(str);
      set(gca, 'YScale', 'log');
      xlim([1900 2020]);
      
    subplot(3,2,6);
      plot(year_linked_all,d_ppwt,'- r');
      hold on;
       plot(year_linked_all,d_pmad,'- b');
      hold off
      str = ['  population (growth rate)'];
      title(str);
      xlim([1900 2020]);
      
      orient landscape;
      fig_name = ['PWT_Maddison_Data_' ss1];
      print(fig, [figdir fig_name], '-dpdf','-bestfit');
      close gcf;
      
    end;
end;
%}